from collections import defaultdict
from glob import glob
import sys

glob_files = sys.argv[1]
loc_outfile = sys.argv[2]

def kaggle_bag(glob_files, loc_outfile, method="average", weights="uniform"):
  ranks = defaultdict(int)
  if method == "average":
    scores = defaultdict(float)
  with open(loc_outfile,"w") as outfile:
    for i, glob_file in enumerate( glob(glob_files) ):
      print("parsing: {}".format(glob_file))
      # sort glob_file by first column, ignoring the first line
      lines = open(glob_file).readlines()
      for rank, line_for_rank  in enumerate(lines[1:]):
          ranks[line_for_rank.split(",")[0]] = rank
      lines = [lines[0]] + sorted(lines[1:])
      for e, line in enumerate( lines ):
        if i == 0 and e == 0:
          outfile.write(line)
        if e > 0:
          row = line.strip().split(",")
          scores[(e,row[0])] += float(row[1])

    for j,k in sorted(scores, key=lambda item: ranks[item[1]]):
      label = 1 if scores[(j,k)]/(i+1) > 0.5 else 0  
      outfile.write("%s,%f,%i\n"%(k,scores[(j,k)]/(i+1), label))
    print("wrote to {}".format(loc_outfile))

kaggle_bag(glob_files, loc_outfile)